from pyspark import SparkConf
from pyspark import SparkContext

if __name__ == '__main__':
    conf = SparkConf().setMaster("local[*]").setAppName("spark01")
    sc = SparkContext(conf=conf)

    rdd1 = sc.textFile('../data/test1.txt')  # rdd1中只有三条数据
    rdd2 = rdd1.flatMap(lambda line: line.split(','))
    rdd3 = rdd2.map(lambda word: (word, 1))  # K,V的RDD

    rdd4 = rdd3.groupByKey()  # K,V

    print(rdd4.collect())
